library(ape)
library(phangorn)
library(caper)
library(tidyverse)
library(ggtree)
library(picante)
library(brms)
library(phangorn)
library(phytools)
library(treeio)
library(MASS)
library(car)
library(corrplot)
library(emmeans)
library(broom)
library(ggdist)
library(tidybayes)
library(raster)
library(sf)
library(exactextractr)
library(performance)##Check ultrametric and/or fix
check_and_fix_ultrametric <- function(phy){
if (!is.ultrametric(phy)){
vv <- vcv.phylo(phy)
dx <- diag(vv)
mxx <- max(dx) - dx
for (i in 1:length(mxx)){
phy$edge.length[phy$edge[,2] == i] <- phy$edge.length[phy$edge[,2] == i] + mxx[i]
}
if (!is.ultrametric(phy)){
stop("Ultrametric fix failed\n")
}
}
return(phy)
}
##Removed duplicated tips
remove_duplicate_tips<-function(tree){
for(spe in unique(tree$tip.label)){
pos<-grep(paste("\\b",spe,"\\b",sep=""),tree$tip.label)
if(length(pos)>1){
rem<-pos[2:length(pos)]
tree<-ape::drop.tip(phy=tree,tip=rem)
}
}
return(tree)
}
## Function for renaming tips
rename.tips.phylo <- function(tree, names) {
tree$tip.label <- names
return(tree)
}
#Stardarize variables
standard_varibles<-function(frame_data){
for(nom in names(frame_data)){
frame_data<-data.frame(frame_data)
if(class(frame_data[,nom])!="numeric"){next}
frame_data[,nom]<-scale(frame_data[,nom])[,1]
}
return(frame_data)
}
#standarize single variables
scale_single <- function(x){
(x - mean(x, na.rm=TRUE)) / sd(x, na.rm=TRUE)
}
#Standard error function
se <- function(x) sd(x)/sqrt(length(x))
#Mean function
meanfun <- function(data, i){
d <- data[i, ]
return(mean(d))
}
#Variation coefficient
var_coef <- function(x, na.rm = FALSE) {
sd(x, na.rm=na.rm) / mean(x, na.rm=na.rm)
}We are going to load the phylogenies of the two main families (Araneidae and Theridiidae) obtained in BEAST to removed duplicated tips and prune some outgroups to generate a single phylogeny for further analyses.
#Load Araneidae tree
mcc_tree<-read.nexus("araneidae_new_final.tre")
#load fixed tip names
nam_tree<-read_csv("tip_names_araneidae.csv",col_names=T)
mcc_tree$tip.label<-nam_tree$corrected_name
###Remove repeated tips
tree_removedTips<-remove_duplicate_tips(mcc_tree)
##Remove tips to mix with Theridiidae tree
core<-extract.clade(phy=tree_removedTips,node=c(194), collapse.singles = TRUE,interactive = FALSE)
outgroups<- tree_removedTips$tip.label[which(tree_removedTips$tip.label %in% core$tip.label==FALSE)]
outgroups_araneidae<-outgroups
tree_removedTips<-drop.tip(tree_removedTips,outgroups)#Load Theridiidae tree
tree_theridiidae<-read.nexus("total_Theridiidae_tree.tre")
#load fixed tip names
nam_tree<-read_delim("theridiidae_tips.txt",col_names=F)
tree_theridiidae$tip.label<-nam_tree$X2
#Remove repeated tips
tree_removed_theridiidae<-remove_duplicate_tips(tree_theridiidae)
#remove problematic tips
problematic_tips<-c("Chrysso_albipes","Chrysso_sp","Erigone_dentosa")
tree_removed_theridiidae<-drop.tip(tree_removed_theridiidae,c(problematic_tips))
#Identify the outgroup
#plotTree(tree_removed_theridiidae)
#nodelabels()
outgroups<-extract.clade(phy=tree_removed_theridiidae, node=304, root.edge = 0, collapse.singles = TRUE,interactive = FALSE) #Keep an eye on the node
outgroups_theridiidae<-outgroupsNow with the two phylogenies, we are going to join them fro further analyses
calib<-makeChronosCalib(tree_removed_theridiidae, age.min = max(node.depth.edgelength(tree_removedTips)), age.max = max(node.depth.edgelength(tree_removedTips)))
tmp_t<-chronos(tree_removed_theridiidae, lambda = 1, model = "correlated", quiet = FALSE,
calibration = calib,
control = chronos.control())##
## Setting initial dates...
## Fitting in progress... get a first set of estimates
## (Penalised) log-lik = -628.3153
## Optimising rates... dates... -628.3153
##
## log-Lik = -628.3153
## PHIIC = 2190.63
joint_trees_outgroups<-bind.tree(tree_removedTips,tmp_t, interactive = FALSE)
joint_trees<-bind.tree(tree_removedTips, ape::drop.tip(phy=tmp_t,outgroups$tip.label), interactive = FALSE)
# get scaled edge.length
joint_trees$edge.length <- joint_trees$edge.length / (max(joint_trees$edge.length))
#Remove duplicate tips
joint_trees<-remove_duplicate_tips(joint_trees)
#Check that the final tree is ultrametric
joint_trees<-check_and_fix_ultrametric(joint_trees)#load the csv file
join_dataset<-read_csv("data_total.csv",col_names=T)
#replace spaces in species names
join_dataset$species<-gsub(pattern=" ", replacement="_",join_dataset$species)
#keep unique rows
join_dataset<-distinct(join_dataset,species,.keep_all = TRUE)
#Tranform presence in islands to a binary variable
join_dataset<-join_dataset %>% mutate(bin_island=ifelse(cat_island=="island"|cat_island=="island_continent",1,0))
#replace spaces in species names on the tree
join_tree<-multi2di(joint_trees)
join_tree$tip.label<-gsub(pattern=" ", replacement="_",join_tree$tip.label)
##Check if the table match the tree tips
#remove species that are not in the phylogeny
join_dataset<-join_dataset[join_dataset$species %in% join_tree$tip.label,]
##Add species with no information into the phylogeny, like XX_sp
for(spe in unique(join_tree$tip.label)){
#Remove
if(spe %in% join_dataset$species==FALSE){
print(spe)
join_dataset<-join_dataset %>% add_row(species=spe)
}
}
#Let's modify the dataset to deal with colour polytipic species
join_dataset$polymorphism[which(join_dataset$polymorphism %in% c("polytipic","possible polytipic","pattern variable")==TRUE)]<-NA
join_dataset<-join_dataset %>% filter(!is.na(polymorphism))
dataset_all_species_phylogeny<-join_datasetNow we have a single and a dataset that match each other.
Let’s see how is the presence of colour polymorphism present in the phylogeny
#Change tree name
to_plot_tree<-join_tree
#Find colour polymorphic lineages
otus<-join_dataset %>% filter(polymorphism=="yes") %>% pull(species)
to_plot_tree<-groupOTU(to_plot_tree, otus)
df_polymorphism<-data.frame(join_dataset$polymorphism)
#df_island<-data.frame(as.character(join_dataset$bin_island))
rownames(df_polymorphism)<-join_dataset$species
#Plot tree with names
p<-ggtree(to_plot_tree, layout='circular') + geom_tiplab()
#pdf("total_tree_names.pdf", width=20,height=20)
#plot(p)
#dev.off()
#Plot tree colour polymorphism
p<-ggtree(to_plot_tree, layout='circular')
#pdf("total_tree_polymorphism.pdf", width=20,height=20)
gheatmap(p, df_polymorphism, offset=.001, width=.08,colnames = FALSE, colnames_offset_y = 1)+scale_fill_manual(values=c("#1ABEC6","#FF5B00"),name="Presence of\ncolour polymorphism")#dev.off()Arachnids is one of the groups with the poorest geographic information available in public databases.For instance, in our data ~51% of the species has less than 50 geographical records
species_points<-join_dataset %>% drop_na(n_points)
species_geo<-nrow(species_points[species_points$n_points<50,])/nrow(species_points)*100
print(paste0(species_geo,"%"," of the species with geographical information has less than 50 geographical records"))## [1] "52.6881720430108% of the species with geographical information has less than 50 geographical records"
To account for this, we decided to calculated the mean and its 95% confidence interval (CI) for the number of geographical records available for all the species. We excluded species from the subsequent analyses that fell outside the lower CI.
#Due to high vari
l_points<-na.omit(log(join_dataset$n_points))
##Let's calculate the 95% around the mean
library(boot)
data <- data.frame(xs = l_points)
bo <- boot(data[, "xs", drop = FALSE], statistic=meanfun, R=5000)
mean_ci<-boot.ci(bo, conf=0.95, type="bca")
ggplot(tibble(x=bo$t[,1]), aes(x=x)) +geom_density()+geom_segment(x=mean_ci$bca[4],xend=mean_ci$bca[5],y=0,yend=0,color="blue",size=2,lineend="round")##Remove species with low number of records
datos_filtered<-join_dataset %>% filter(n_points>=exp(mean_ci$bca[4]))
#let's keep this filtered dataset for further analyses
data_filtered_phylogeny<-datos_filteredall the predictors seems skewed or not uniform distributed, let’s modify some predictors that may affect the regression due to their non-normal distribution
datos_filtered$T_centroid_lat<-abs(datos_filtered$centroid_lat)
datos_filtered$T_lat_range<-sqrt(abs(datos_filtered$lat_range))
datos_filtered$T_area_polygon<-sqrt(datos_filtered$area_polygon+1)
datos_filtered$T_lat_range_wsc<-abs(datos_filtered$lat_range_wsc)
datos_filtered$T_area_countries_wsc<-sqrt(datos_filtered$area_countries_wsc)
datos_filtered$T_points<-log(datos_filtered$n_points)
datos_filtered$T_bole<-log(datos_filtered$bole_female)Remove species from islands that can affect calculations due to their geographic limit for dispersion
datos_filtered_total<-datos_filtered %>% filter(cat_island != "island") %>% data.frame()Number of colour monomorphic and polymorphic species
table(datos_filtered_total$polymorphism)##
## no yes
## 55 33
Correlation plot between the variables
cor_matrix <- cor(na.omit(datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")]))
colnames(cor_matrix)<c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rownames(cor_matrix)<-c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
par(mfrow=c(1,1))
corrplot(cor_matrix, method = "number", type = "upper", order = "original", tl.cex=1, )Standardize variable before the analysis, excluding the count variables
datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()Now, let’s prepare the dataset and tree so they match, this is super important. Your phylogeny names need to match a column of data
Let’s run the models!
Evaluate if the colour monomorphic and colour polymorphic species differ in the number of records
set.seed(30011994)
brm_points <- brm(
n_points ~ polymorphism+(1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
## Warning: There were 40000 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 1.06, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
#pairs(brm_points)
summary(brm_points)## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Family: poisson
## Links: mu = log
## Formula: n_points ~ polymorphism + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 88)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 88)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 2.92 0.21 2.53 3.36 1.03 113 310
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 6.01 0.92 4.26 7.78 1.05 60 109
## polymorphismyes 0.88 0.48 -0.11 1.80 1.05 60 141
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(brm_points)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 1.000 (95% CI [1.000, 1.000])
## Marginal R2: 0.014 (95% CI [4.560e-11, 0.269])
#
pp_check(brm_points)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_points))They do not have differences in the number of records
Evaluate if the colour monomorphic and colour polymorphic species differ in the latitude of the centroid
set.seed(30011994)
brm_centroid <- brm(
T_centroid_lat ~ polymorphism+(1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = skew_normal(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 1000
)## Compiling Stan program...
## Start sampling
## Warning: There were 3 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
pairs(brm_centroid)summary(brm_centroid)## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_centroid_lat ~ polymorphism + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 88)
## Draws: 4 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup draws = 2000
##
## Group-Level Effects:
## ~species (Number of levels: 88)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.88 0.18 0.56 1.26 1.02 114 209
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.04 0.31 -0.68 0.50 1.01 488 411
## polymorphismyes -0.13 0.21 -0.53 0.27 1.00 1052 1344
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.56 0.14 0.27 0.82 1.02 106 138
## alpha -3.80 2.82 -9.56 1.77 1.00 548 621
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_centroid)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.674 (95% CI [0.425, 0.953])
## Marginal R2: 0.007 (95% CI [2.683e-09, 0.051])
## Check if the predicted values fits the rawdata
pp_check(brm_centroid)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_centroid))They do not have differences in the latitude of the centroid
Evaluate if the colour monomorphic and colour polymorphic species differ in the body length
set.seed(30011994)
brm_bole <- brm(
T_bole ~ polymorphism+(1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = gaussian(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
## Warning: There were 16 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 384 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
pairs(brm_bole)summary(brm_bole)## Warning: There were 16 divergent transitions after warmup. Increasing
## adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_bole ~ polymorphism + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 87)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 87)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.99 0.11 0.78 1.20 1.00 1369 3447
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.03 0.32 -0.61 0.66 1.00 3377 4174
## polymorphismyes -0.09 0.18 -0.44 0.26 1.00 4315 10220
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.24 0.11 0.05 0.47 1.01 315 153
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(brm_bole)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.947 (95% CI [0.823, 1.000])
## Marginal R2: 0.004 (95% CI [1.427e-12, 0.032])
pp_check(brm_bole)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_bole))They do not have differences in body length
let’s Evaluate the association fo the predictors
lm_lat_range <- lm(T_lat_range ~ polymorphism+T_bole+T_centroid_lat, data=datos_filtered_total)
check_collinearity(lm_lat_range)The predictors are not collinear, we can use all of them in the models
set.seed(30011994)
brm_latrange_1 <- brm(
T_lat_range ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = gaussian(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(brm_latrange_1)summary(brm_latrange_1)## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_lat_range ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 87)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 87)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.47 0.29 0.03 1.09 1.00 2027 2770
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.13 0.22 -0.56 0.31 1.00 17954 14689
## polymorphismyes 0.46 0.23 0.02 0.91 1.00 15162 23355
## T_bole 0.14 0.13 -0.10 0.39 1.00 27163 24394
## T_centroid_lat -0.38 0.12 -0.61 -0.14 1.00 25869 29033
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.81 0.13 0.50 1.02 1.00 2455 2539
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(brm_latrange_1)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.368 (95% CI [0.134, 0.719])
## Marginal R2: 0.281 (95% CI [0.126, 0.423])
pp_check(brm_latrange_1)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_latrange_1))area_polygon_1 <- brm(
T_area_polygon ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)),
data = datos_filtered_total,
family = skew_normal(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(area_polygon_1)summary(area_polygon_1)## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_area_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 86)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 86)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.25 0.17 0.01 0.62 1.00 9849 15045
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.05 0.17 -0.38 0.30 1.00 47271 29072
## polymorphismyes 0.30 0.21 -0.11 0.72 1.00 52964 31354
## T_bole 0.12 0.12 -0.11 0.36 1.00 50018 30576
## T_centroid_lat 0.12 0.13 -0.12 0.37 1.00 55075 30681
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.96 0.09 0.80 1.15 1.00 30978 24118
## alpha 3.56 1.54 1.35 7.45 1.00 24221 18226
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(area_polygon_1)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.103 (95% CI [0.005, 0.253])
## Marginal R2: 0.060 (95% CI [0.001, 0.154])
pp_check(area_polygon_1)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(area_polygon_1))To indirectly explore the difference in niche width between colour monomorphic and polymorphic species, we measured the number of ecological regions occupy by each species using the geographical records and polygons. the ecological regions were obtained here
BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on geographical records
set.seed(30011994)
eco_reg_points_1 <- brm(
eco_reg_points ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(eco_reg_points_1)summary(eco_reg_points_1)## Family: poisson
## Links: mu = log
## Formula: eco_reg_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 87)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 87)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.25 0.12 1.04 1.50 1.00 8809 15355
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.09 0.40 2.31 3.86 1.00 5945 11406
## polymorphismyes 0.41 0.21 -0.00 0.83 1.00 6282 12313
## T_bole 0.27 0.14 -0.01 0.55 1.00 7752 13787
## T_centroid_lat 0.12 0.12 -0.11 0.35 1.00 6849 13123
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(eco_reg_points_1)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.964 (95% CI [0.947, 0.977])
## Marginal R2: 0.112 (95% CI [1.779e-04, 0.387])
pp_check(eco_reg_points_1)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(eco_reg_points_1))BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on polygon
eco_reg_polygon_1 <- brm(
eco_reg_polygon~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(eco_reg_polygon_1)summary(eco_reg_polygon_1)## Family: poisson
## Links: mu = log
## Formula: eco_reg_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 86)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 86)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.34 0.12 1.13 1.59 1.00 6283 11297
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.73 0.43 2.91 4.57 1.00 4766 8879
## polymorphismyes 0.43 0.23 -0.01 0.88 1.00 4972 8896
## T_bole 0.34 0.15 0.04 0.63 1.00 5044 9268
## T_centroid_lat -0.20 0.12 -0.45 0.04 1.00 5055 8872
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(eco_reg_polygon_1)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.985 (95% CI [0.978, 0.991])
## Marginal R2: 0.212 (95% CI [0.013, 0.505])
## Check if the predicted values fits the rawdata
pp_check(eco_reg_polygon_1)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(eco_reg_polygon_1))Addtionally, we also explored if colour monomorphic and polymorphic species differ in the number of climatic zones they occupy. We measured the number of climatic zones for each species using the geographical records and polygons. the Köppen-Geiger climate classification zones were obtained here
temp_zones_points_1 <- brm(
temp_zones_points~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(temp_zones_points_1)summary(temp_zones_points_1)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 87)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 87)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.11 0.08 0.00 0.29 1.00 9680 17461
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.04 0.07 1.88 2.18 1.00 34189 23133
## polymorphismyes 0.16 0.09 -0.02 0.34 1.00 50666 28279
## T_bole 0.19 0.05 0.09 0.29 1.00 47745 30749
## T_centroid_lat 0.09 0.05 -0.00 0.19 1.00 50836 31777
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 56.56 52.85 12.46 202.97 1.00 40605 31129
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_points_1)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.267 (95% CI [0.108, 0.450])
## Marginal R2: 0.225 (95% CI [0.072, 0.385])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_points_1)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(temp_zones_points_1))temp_zones_polygon_1 <- brm(
temp_zones_polygon~polymorphism+T_bole+T_centroid_lat+ (1| gr(species, cov = A)) ,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(temp_zones_polygon_1)summary(temp_zones_polygon_1)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: datos_filtered_total (Number of observations: 86)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 86)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.08 0.07 0.00 0.25 1.00 13062 16673
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.55 0.07 2.40 2.69 1.00 34319 25230
## polymorphismyes 0.00 0.10 -0.20 0.20 1.00 45404 29705
## T_bole 0.03 0.06 -0.09 0.13 1.00 36527 29929
## T_centroid_lat -0.18 0.06 -0.29 -0.07 1.00 38854 30488
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 9.48 3.40 5.15 17.34 1.00 37365 22585
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_polygon_1)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.243 (95% CI [0.090, 0.395])
## Marginal R2: 0.206 (95% CI [0.059, 0.357])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_polygon_1)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(temp_zones_polygon_1))all_models<-NULL
for(mod in c("brm_latrange_1","area_polygon_1","eco_reg_points_1","eco_reg_polygon_1","temp_zones_polygon_1","temp_zones_points_1")){
baye_mode = get(mod)
bayes_results<-baye_mode %>%
spread_draws(b_polymorphismyes)
bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
all_models<-bind_rows(all_models, bayes_results)
}
all_models %>% ggplot(aes(y = model, x = b_polymorphismyes)) +
stat_halfeye()+
theme_classic()+
geom_vline(xintercept = 0, linetype = "dashed",col="black",size=1)+
labs(x="Estimate",y="Models")Plot of the model
paleta1<-c("#1ABEC6","#FF5B00")
dataset_plot<-datos_filtered_total %>% drop_na(T_lat_range|polymorphism|T_bole|T_centroid_lat)
dataset_plot$predict<-predict(brm_latrange_1,type="response")[,"Estimate"]
dataset_plot %>% ggplot(aes(x=polymorphism,y=predict,fill=polymorphism))+geom_point(aes(x=polymorphism,y=T_lat_range),shape = 21,size=3, position = position_jitterdodge(),alpha=0.5)+geom_violin(aes(x=polymorphism,y=T_lat_range),alpha=0.1, position = position_dodge(width = .75),size=1)+
stat_summary(fun = mean,aes(color = polymorphism,group=polymorphism),fun.min = function(x) mean(x) - (2*se(x)),fun.max = function(x) mean(x)+(2*se(x)),geom = "pointrange",shape=22,size=1.5,col="black")+scale_fill_manual(values=paleta1)+scale_colour_manual(values=paleta1)+theme_classic()+labs(x="Colour polymorphism",y="Latitudinal range")To eliminate any false association caused by sampling bias, we repeated the above analyses with a reduced dataset. The subset was created by calculating a linear regression between the number of geographical records and the geographical area of the regions described in the WSC (a positive relationship), and then discarding species outside the lower boundary of the 50% predictive confidence interval (Quantile 0.75 and Quantile 0.25). In this way we only kept species with a small number of records when their WSC calculated range was calculated as very small (Predictive interval subset; supplementary figure 2). This approach is different from using a threshold for the number of points because it acknowledges that some species will have fewer records if their range is very restricted.
Let’s generate the dataset
no_island<-datos_filtered %>% filter(cat_island!="island") %>% drop_na(n_points) %>% drop_na(area_polygon) %>%drop_na(polymorphism)
no_island<-na.omit(no_island)
no_island[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(no_island[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()
lm_points<-lm(T_points~T_area_countries_wsc,data=no_island)
summary(lm_points)##
## Call:
## lm(formula = T_points ~ T_area_countries_wsc, data = no_island)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3549 -0.5525 -0.0333 0.5903 1.7387
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.694e+00 3.466e-01 -4.889 5.73e-06 ***
## T_area_countries_wsc 3.289e-04 6.445e-05 5.103 2.50e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8659 on 74 degrees of freedom
## Multiple R-squared: 0.2603, Adjusted R-squared: 0.2503
## F-statistic: 26.04 on 1 and 74 DF, p-value: 2.503e-06
no_island<-cbind(no_island,predict(lm_points,interval="prediction",level=0.50))## this plots the q10% and q90%## Warning in predict.lm(lm_points, interval = "prediction", level = 0.5): predictions on current data refer to _future_ responses
ggplot(no_island,aes(T_area_countries_wsc,T_points))+geom_point(size=3,aes(col=polymorphism))+ geom_smooth(method = "lm",level=0.99)+geom_line(aes(y=upr),col="red")+geom_line(aes(y=lwr),col="red")+theme_bw()## `geom_smooth()` using formula = 'y ~ x'
##subset based on the prediction intervals
pi_subset<-no_island[!no_island$T_points<no_island$lwr,]Number of colour monomorphic and polymorphic species after filtering
table(pi_subset$polymorphism)##
## no yes
## 35 24
Correlation plot between the variables
cor_matrix <- cor(na.omit(pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")]))
colnames(cor_matrix)<c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rownames(cor_matrix)<-c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
par(mfrow=c(1,1))
corrplot(cor_matrix, method = "number", type = "upper", order = "original", tl.cex=1, )Standardize variable before the analysis, excluding the count variables
pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()Now, let’s prepare the dataset and tree so they match, this is super important. Your phylogeny names need to match a column of data
Let’s run the models!
Evaluate if the colour monomorphic and colour polymorphic species differ in the number of records
set.seed(30011994)
brm_points <- brm(
n_points ~ polymorphism+(1| gr(species, cov = A)) ,
data = pi_subset,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
## Warning: There were 34494 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 1.06, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
pairs(brm_points)summary(brm_points)## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Family: poisson
## Links: mu = log
## Formula: n_points ~ polymorphism + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 59)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 59)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 2.55 0.24 2.13 3.07 1.03 165 401
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 6.68 0.84 4.95 8.37 1.04 66 106
## polymorphismyes 0.93 0.54 -0.14 2.13 1.06 68 107
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_points)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 1.000 (95% CI [0.999, 1.000])
## Marginal R2: 0.056 (95% CI [9.213e-11, 0.457])
## Check if the predicted values fits the rawdata
pp_check(brm_points)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_points))They do not have differences in the number of records
Evaluate if the colour monomorphic and colour polymorphic species differ in the latitude of the centroid
set.seed(30011994)
brm_centroid <- brm(
T_centroid_lat ~ polymorphism+(1| gr(species, cov = A)) ,
data = pi_subset,
family = skew_normal(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
## Warning: There were 6550 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 6990 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 1.13, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
pairs(brm_centroid)summary(brm_centroid)## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 6550 divergent transitions after warmup. Increasing
## adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_centroid_lat ~ polymorphism + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 59)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 59)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.09 0.17 0.70 1.39 1.03 245 1290
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.17 0.37 -0.85 0.61 1.03 89 2175
## polymorphismyes -0.11 0.26 -0.61 0.28 1.11 23 160
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.20 0.20 0.01 0.69 1.10 37 56
## alpha -1.58 3.94 -8.29 6.77 1.04 91 6601
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
pp_check(brm_centroid)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_centroid))They do not have differences in the latitude of the centroid
Evaluate if the colour monomorphic and colour polymorphic species differ in the body length
set.seed(30011994)
brm_bole <- brm(
T_bole ~ polymorphism+(1| gr(species, cov = A)) ,
data = pi_subset,
family = gaussian(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
## Warning: There were 4435 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 8599 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 1.11, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
pairs(brm_bole)summary(brm_bole)## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 4435 divergent transitions after warmup. Increasing
## adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_bole ~ polymorphism + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 59)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 59)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.03 0.13 0.77 1.25 1.08 32 487
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.05 0.32 -0.59 0.69 1.04 2179 3315
## polymorphismyes -0.03 0.20 -0.41 0.38 1.03 156 3513
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.19 0.14 0.02 0.51 1.09 37 90
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_bole)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.971 (95% CI [0.809, 1.000])
## Marginal R2: 0.005 (95% CI [1.525e-13, 0.035])
## Check if the predicted values fits the rawdata
pp_check(brm_bole)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_bole))They do not have differences in body length
let’s Evaluate the association fo the predictors
lm_lat_range <- lm(T_lat_range ~ polymorphism+T_bole+T_centroid_lat, data=pi_subset)
check_collinearity(lm_lat_range)The predictors are not collinear, we can use all of them in the models
set.seed(30011994)
brm_latrange_2 <- brm(
T_lat_range ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = pi_subset,
family = student(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
## Warning: There were 1 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
pairs(brm_latrange_2)summary(brm_latrange_2)## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: student
## Links: mu = identity; sigma = identity; nu = identity
## Formula: T_lat_range ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 59)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 59)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.44 0.32 0.02 1.13 1.00 1394 1541
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.11 0.23 -0.58 0.34 1.00 15179 9107
## polymorphismyes 0.27 0.24 -0.21 0.75 1.00 22891 24974
## T_bole 0.14 0.14 -0.14 0.42 1.00 24891 25625
## T_centroid_lat -0.47 0.16 -0.76 -0.14 1.00 11503 15156
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.69 0.16 0.31 0.95 1.00 1687 1435
## nu 20.13 13.90 2.87 55.25 1.00 26351 13427
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_latrange_2)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.426 (95% CI [0.155, 0.874])
## Marginal R2: 0.345 (95% CI [0.143, 0.512])
## Check if the predicted values fits the rawdata
pp_check(brm_latrange_2)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_latrange_2))area_polygon_2 <- brm(
T_area_polygon ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)),
data = pi_subset,
family = skew_normal(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
## Warning: There were 1 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
pairs(area_polygon_2)summary(area_polygon_2)## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_area_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 59)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 59)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.43 0.26 0.02 1.01 1.00 3907 5467
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.07 0.25 -0.60 0.38 1.00 19542 15643
## polymorphismyes 0.15 0.27 -0.37 0.69 1.00 21713 24500
## T_bole 0.18 0.15 -0.12 0.48 1.00 28270 27489
## T_centroid_lat 0.04 0.17 -0.30 0.38 1.00 23833 26439
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.89 0.14 0.61 1.16 1.00 5582 5341
## alpha 3.70 2.31 -0.67 8.90 1.00 17742 19912
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(area_polygon_2)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.205 (95% CI [0.010, 0.540])
## Marginal R2: 0.078 (95% CI [4.048e-04, 0.215])
## Check if the predicted values fits the rawdata
pp_check(area_polygon_2)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(area_polygon_2))To indirectly explore the difference in niche width between colour monomorphic and polymorphic species, we measured the number of ecological regions occupy by each species using the geographical records and polygons. the ecological regions were obtained here
BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on geographical records
set.seed(30011994)
eco_reg_points_2 <- brm(
eco_reg_points ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = pi_subset,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
pairs(eco_reg_points_2)summary(eco_reg_points_2)## Family: poisson
## Links: mu = log
## Formula: eco_reg_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 59)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 59)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.92 0.10 0.74 1.15 1.00 9305 16893
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.26 0.31 2.65 3.87 1.00 7750 14109
## polymorphismyes 0.31 0.19 -0.06 0.68 1.00 7719 14471
## T_bole 0.32 0.13 0.06 0.57 1.00 8831 14818
## T_centroid_lat -0.01 0.12 -0.24 0.22 1.00 8440 14479
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(eco_reg_points_2)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.966 (95% CI [0.946, 0.980])
## Marginal R2: 0.166 (95% CI [0.005, 0.441])
## Check if the predicted values fits the rawdata
pp_check(eco_reg_points_2)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(eco_reg_points_2))BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on polygon
eco_reg_polygon_2 <- brm(
eco_reg_polygon~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = pi_subset,
family = poisson(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
pairs(eco_reg_polygon_2)summary(eco_reg_polygon_2)## Family: poisson
## Links: mu = log
## Formula: eco_reg_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 59)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 59)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.04 0.11 0.84 1.29 1.00 6499 12865
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.82 0.35 3.12 4.51 1.00 6261 11414
## polymorphismyes 0.32 0.21 -0.08 0.72 1.00 6856 11511
## T_bole 0.25 0.14 -0.03 0.54 1.00 7193 12813
## T_centroid_lat -0.10 0.13 -0.35 0.15 1.00 6916 11777
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(eco_reg_polygon_2)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.981 (95% CI [0.971, 0.989])
## Marginal R2: 0.148 (95% CI [0.001, 0.440])
## Check if the predicted values fits the rawdata
pp_check(eco_reg_polygon_2)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(eco_reg_polygon_2))Addtionally, we also explored if colour monomorphic and polymorphic species differ in the number of climatic zones they occupy. We measured the number of climatic zones for each species using the geographical records and polygons. the Köppen-Geiger climate classification zones were obtained here
temp_zones_points_2 <- brm(
temp_zones_points~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
data = pi_subset,
family = negbinomial(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
pairs(temp_zones_points_2)summary(temp_zones_points_2)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 59)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 59)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.08 0.06 0.00 0.21 1.00 18150 20046
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.19 0.07 2.04 2.32 1.00 50492 29794
## polymorphismyes 0.09 0.10 -0.10 0.28 1.00 68882 30031
## T_bole 0.19 0.05 0.08 0.30 1.00 56006 32956
## T_centroid_lat 0.06 0.05 -0.04 0.17 1.00 60393 31710
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 111.12 79.86 25.05 326.52 1.00 72428 30306
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_points_2)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.369 (95% CI [0.167, 0.549])
## Marginal R2: 0.329 (95% CI [0.119, 0.507])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_points_2)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(temp_zones_points_2))temp_zones_polygon_2 <- brm(
temp_zones_polygon~polymorphism+T_bole+T_centroid_lat+ (1| gr(species, cov = A)) ,
data = pi_subset,
family = negbinomial(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 20000
)## Compiling Stan program...
## Start sampling
pairs(temp_zones_polygon_2)summary(temp_zones_polygon_2)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A))
## Data: pi_subset (Number of observations: 59)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 59)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.12 0.09 0.00 0.33 1.00 12553 16198
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.59 0.09 2.40 2.76 1.00 42286 26261
## polymorphismyes -0.07 0.12 -0.31 0.17 1.00 66712 31409
## T_bole 0.01 0.07 -0.13 0.14 1.00 47442 31032
## T_centroid_lat -0.24 0.07 -0.38 -0.11 1.00 61767 31816
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 11.35 8.47 4.79 27.69 1.00 27750 17123
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_polygon_2)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.383 (95% CI [0.170, 0.552])
## Marginal R2: 0.322 (95% CI [0.116, 0.504])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_polygon_2)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(temp_zones_polygon_2))all_models<-NULL
for(mod in c("brm_latrange_2","area_polygon_2","eco_reg_points_2","eco_reg_polygon_2","temp_zones_polygon_2","temp_zones_points_2")){
baye_mode = get(mod)
bayes_results<-baye_mode %>%
spread_draws(b_polymorphismyes)
bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
all_models<-bind_rows(all_models, bayes_results)
}
all_models %>% ggplot(aes(y = model, x = b_polymorphismyes)) +
stat_halfeye()+
theme_classic()+
geom_vline(xintercept = 0, linetype = "dashed",col="black",size=1)+
labs(x="Estimate",y="Models")Plot of the model
paleta1<-c("#1ABEC6","#FF5B00")
dataset_plot<-pi_subset %>% drop_na(T_lat_range|polymorphism|T_bole|T_centroid_lat)
dataset_plot$predict<-predict(brm_latrange_2,type="response")[,"Estimate"]
dataset_plot %>% ggplot(aes(x=polymorphism,y=predict,fill=polymorphism))+geom_point(aes(x=polymorphism,y=T_lat_range),shape = 21,size=3, position = position_jitterdodge(),alpha=0.5)+geom_violin(aes(x=polymorphism,y=T_lat_range),alpha=0.1, position = position_dodge(width = .75),size=1)+
stat_summary(fun = mean,aes(color = polymorphism,group=polymorphism),fun.min = function(x) mean(x) - (2*se(x)),fun.max = function(x) mean(x)+(2*se(x)),geom = "pointrange",shape=22,size=1.5,col="black")+scale_fill_manual(values=paleta1)+scale_colour_manual(values=paleta1)+theme_classic()+labs(x="Colour polymorphism",y="Latitudinal range")We observed that the models with continous variables have values close to 0 and that the climatic zones models have random effects low variaces close to 0. This means that the phylogentic relationships of the individuals are not having a major effect on these models.
hyp <- "sd_species__Intercept^2 / (sd_species__Intercept^2 + sigma^2) = 0"
lat_range_sig <- hypothesis(brm_latrange_1, hyp, class = NULL)
area_sig<-hypothesis(area_polygon_1, hyp, class = NULL)
ggplot() + geom_histogram(aes(x = lat_range_sig$samples$H1, fill="Latitudinal range model"), alpha = 0.5)+
geom_histogram(aes(x = area_sig$samples$H1,fill="Area polygon model"), alpha = 0.5)+labs(x="Pagel's lambda",y="Count")+theme_classic()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
In consequence, we decided to run a set of models without considering the phylogeny. This let us include more species with good geographic records but that were not include in the phylogenetic reconstruction due to lack of genetic data.
join_dataset<-read_csv("data_total.csv",col_names=T)
join_dataset$species<-gsub(pattern=" ", replacement="_",join_dataset$species)
#keep unique rows
join_dataset<-distinct(join_dataset,species,.keep_all = TRUE)
###Tranform island as binary
join_dataset<-join_dataset %>% mutate(bin_island=ifelse(cat_island=="island"|cat_island=="island_continent",1,0))
#Let's modify the dataset to deal with polytipic
join_dataset$polymorphism[which(join_dataset$polymorphism %in% c("polytipic","possible polytipic","pattern variable")==TRUE)]<-NAArachnids is one of the groups with the poorest geographic information available in public databases. For instance, in our data ~52% of the species has less than 50 geographical records
species_points<-join_dataset %>% drop_na(n_points)
species_geo<-nrow(species_points[species_points$n_points<50,])/nrow(species_points)*100
print(paste0(species_geo,"%"," of the species with geographical information has less than 50 geographical records"))## [1] "52.0179372197309% of the species with geographical information has less than 50 geographical records"
To account for this, we decided to calculated the mean and its 95% confidence interval (CI) for the number of geographical records available for all the species. We excluded species from the subsequent analyses that fell outside the lower CI.
#Due to high vari
l_points<-na.omit(log(join_dataset$n_points))
##Let's calculate the 95% around the mean
library(boot)
data <- data.frame(xs = l_points)
bo <- boot(data[, "xs", drop = FALSE], statistic=meanfun, R=5000)
mean_ci<-boot.ci(bo, conf=0.95, type="bca")
ggplot(tibble(x=bo$t[,1]), aes(x=x)) +geom_density()+geom_segment(x=mean_ci$bca[4],xend=mean_ci$bca[5],y=0,yend=0,color="blue",size=2,lineend="round")##Remove species with low number of records
datos_filtered<-join_dataset %>% filter(n_points>=exp(mean_ci$bca[4]))
#datos_filtered<-na.omit(datos_filtered)
##Let's save this dataset for further analyses
data_without_filtering<-datos_filteredall the predictors seems skewed or not uniform distributed, let’s modify some predictors that may affect the regression due to their non-normal distribution
datos_filtered$T_centroid_lat<-abs(datos_filtered$centroid_lat)
datos_filtered$T_lat_range<-sqrt(abs(datos_filtered$lat_range))
datos_filtered$T_area_polygon<-sqrt(datos_filtered$area_polygon+1)
datos_filtered$T_lat_range_wsc<-abs(datos_filtered$lat_range_wsc)
datos_filtered$T_area_countries_wsc<-sqrt(datos_filtered$area_countries_wsc)
datos_filtered$T_points<-log(datos_filtered$n_points)
datos_filtered$T_bole<-log(datos_filtered$bole_female)Correlation plot between the variables
cor_matrix <- cor(na.omit(pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")]))
colnames(cor_matrix)<c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rownames(cor_matrix)<-c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
par(mfrow=c(1,1))
corrplot(cor_matrix, method = "number", type = "upper", order = "original", tl.cex=1, )Remove species from islands that can affect calculations due to their geographic limit for dispersion
datos_filtered_total<-datos_filtered %>% filter(cat_island != "island") %>% data.frame()Standardize variable before the analysis, excluding the count variables
datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()Number of colour monomorphic and polymorphic species
table(datos_filtered_total$polymorphism)##
## no yes
## 53 41
Let’s run the models!
Evaluate if the colour monomorphic and colour polymorphic species differ in the latitude of the centroid
set.seed(30011994)
brm_centroid <- brm(
T_centroid_lat ~ polymorphism,
data = datos_filtered_total,
family = skew_normal(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(brm_centroid)summary(brm_centroid)## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_centroid_lat ~ polymorphism
## Data: datos_filtered_total (Number of observations: 94)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.24 0.14 -0.03 0.51 1.00 25268 23684
## polymorphismyes -0.49 0.21 -0.90 -0.08 1.00 25413 23777
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.01 0.08 0.87 1.19 1.00 22542 22290
## alpha -0.69 1.86 -4.70 2.71 1.00 17557 18508
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_centroid)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.058 (95% CI [2.813e-09, 0.149])
## Check if the predicted values fits the rawdata
pp_check(brm_centroid)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_centroid))They do not have differences in the latitude of the centroid
Evaluate if the colour monomorphic and colour polymorphic species differ in the body length
set.seed(30011994)
brm_bole <- brm(
T_bole ~ polymorphism,
data = datos_filtered_total,
family = gaussian(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(brm_bole)summary(brm_bole)## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_bole ~ polymorphism
## Data: datos_filtered_total (Number of observations: 93)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.15 0.13 -0.41 0.12 1.00 29936 24931
## polymorphismyes 0.29 0.20 -0.11 0.69 1.00 30839 24686
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.98 0.07 0.85 1.14 1.00 30142 25118
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_bole)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.022 (95% CI [1.736e-10, 0.091])
## Check if the predicted values fits the rawdata
pp_check(brm_bole)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_bole))They do not have differences in their body length
let’s Evaluate the association fo the predictors
lm_lat_range <- lm(T_lat_range ~ polymorphism+T_bole+T_centroid_lat, data=datos_filtered_total)
check_collinearity(lm_lat_range)The predictors are not collinear, we can use all of them in the models
set.seed(30011994)
brm_latrange_3 <- brm(
T_lat_range ~ polymorphism+T_bole+T_centroid_lat,
data = datos_filtered_total,
family = gaussian(),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(brm_latrange_3)summary(brm_latrange_3)## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: T_lat_range ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 93)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.13 0.12 -0.37 0.12 1.00 31251 26998
## polymorphismyes 0.21 0.19 -0.16 0.58 1.00 30427 26572
## T_bole 0.16 0.10 -0.04 0.36 1.00 27478 27499
## T_centroid_lat -0.29 0.10 -0.49 -0.09 1.00 27541 26634
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.87 0.07 0.75 1.01 1.00 31497 26771
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_latrange_3)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.212 (95% CI [0.088, 0.332])
## Check if the predicted values fits the rawdata
pp_check(brm_latrange_3)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(brm_latrange_3))area_polygon_3 <- brm(
T_area_polygon ~ polymorphism+T_bole+T_centroid_lat,
data =datos_filtered_total,
family = skew_normal(),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(area_polygon_3)summary(area_polygon_3)## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: T_area_polygon ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 92)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.06 0.13 -0.32 0.21 1.00 25150 26964
## polymorphismyes 0.16 0.19 -0.21 0.54 1.00 26544 25911
## T_bole 0.14 0.10 -0.06 0.34 1.00 25642 24010
## T_centroid_lat 0.10 0.11 -0.11 0.32 1.00 23371 25990
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.97 0.08 0.83 1.15 1.00 22407 24677
## alpha 3.44 1.27 1.51 6.46 1.00 21139 15885
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(area_polygon_3)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.041 (95% CI [5.077e-04, 0.115])
## Check if the predicted values fits the rawdata
pp_check(area_polygon_3)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
plot(conditional_effects(area_polygon_3))Addtionally, we also explored if colour monomorphic and polymorphic species differ in the number of climatic zones they occupy. We measured the number of climatic zones for each species using the geographical records and polygons. the Köppen-Geiger climate classification zones were obtained here
temp_zones_points_3 <- brm(
temp_zones_points~ polymorphism+T_bole+T_centroid_lat ,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 10000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(temp_zones_points_3)summary(temp_zones_points_3)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_points ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 93)
## Draws: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
## total post-warmup draws = 20000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.06 0.06 1.95 2.17 1.00 16109 13283
## polymorphismyes 0.11 0.08 -0.06 0.27 1.00 17547 14388
## T_bole 0.19 0.04 0.10 0.28 1.00 15352 14351
## T_centroid_lat 0.10 0.04 0.01 0.19 1.00 15517 13291
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 42.56 40.00 11.09 154.44 1.00 13946 9995
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_points_3)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.201 (95% CI [0.073, 0.332])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_points_3)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
#plot(conditional_effects(temp_zones_points))temp_zones_polygon_3 <- brm(
temp_zones_polygon~polymorphism+T_bole+T_centroid_lat ,
data = datos_filtered_total,
family = negbinomial(),
#prior = prior,
control = list(adapt_delta = 0.999, max_treedepth=20)
,chains = 4, cores = 4, iter = 10000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(temp_zones_polygon_3)summary(temp_zones_polygon_3)## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: temp_zones_polygon ~ polymorphism + T_bole + T_centroid_lat
## Data: datos_filtered_total (Number of observations: 92)
## Draws: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
## total post-warmup draws = 20000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.59 0.07 2.46 2.72 1.00 15090 13188
## polymorphismyes -0.04 0.10 -0.24 0.16 1.00 15822 13734
## T_bole 0.05 0.05 -0.05 0.16 1.00 14527 13067
## T_centroid_lat -0.17 0.05 -0.28 -0.06 1.00 13719 12101
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 8.22 2.35 4.76 13.92 1.00 14735 11042
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_polygon_3)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.196 (95% CI [0.058, 0.328])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_polygon_3)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
#plot(conditional_effects(temp_zones_polygon))all_models<-NULL
for(mod in c("brm_latrange_3","area_polygon_3","temp_zones_polygon_3","temp_zones_points_3")){
baye_mode = get(mod)
bayes_results<-baye_mode %>%
spread_draws(b_polymorphismyes)
bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
all_models<-bind_rows(all_models, bayes_results)
}
all_models %>% ggplot(aes(y = model, x = b_polymorphismyes)) +
stat_halfeye()+
theme_classic()+
geom_vline(xintercept = 0, linetype = "dashed",col="black",size=1)+
labs(x="Estimate",y="Models")Considering that the presence on islands can be an indicator of range expansion, we also recorded whether species were distributed on islands by overlapping both sources of geographical distribution (occurrences and WSC) with the global shoreline vector from the islands database (Sayre et al., 2019).
We decided to test the association between colour polymorphism and the presence on islands using two datasets. The first one was using the species occurrences after discarding those species with poor geographic records as done here
##Remove species with low number of records
data_filtered_phylogeny<-data_filtered_phylogeny %>% drop_na(polymorphism) %>% drop_na(bin_island)
table(data_filtered_phylogeny$polymorphism)##
## no yes
## 56 35
This dataset includes 56 colour monomorphic species and 35 colour polymorphic species.
To increase the number of species in our analysis, we created a second dataset without filtering species based on the number of geographic records. We determined the presence on islands using the geographic distribution information available on the World Spider Catalog.
dataset_all_species_phylogeny<-dataset_all_species_phylogeny %>% drop_na(n_points) %>%drop_na(polymorphism) %>% drop_na(bin_island) %>% drop_na(cat_island_points)
table(dataset_all_species_phylogeny$polymorphism)##
## no yes
## 125 61
This second dataset includes 125 colour monomorphic species and 61 colour polymorphic species.
## Family: bernoulli
## Links: mu = logit
## Formula: bin_island ~ 1 + polymorphism + log(bole_female) + (1 | gr(species, cov = A))
## Data: data_filtered_phylogeny (Number of observations: 90)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 90)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 3.86 4.01 0.29 13.01 1.00 4321 6240
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.33 4.18 -3.40 12.37 1.00 15262 10378
## polymorphismyes 4.73 3.38 1.14 13.14 1.00 8214 6157
## logbole_female -0.13 1.60 -3.01 3.15 1.00 15035 10446
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.463 (95% CI [0.069, 0.853])
## Marginal R2: 0.023 (95% CI [1.740e-30, 0.247])
With this dataset there seems to be an association between
colour polymorphism and the presence on islands
## .
## 0 1
## 0.2142857 0.7857143
## .
## 0 1
## 0.02857143 0.97142857
tree=check_and_fix_ultrametric(join_tree)
missing<-tree$tip.label[which(tree$tip.label %in% dataset_all_species_phylogeny$species==FALSE)]
island_dataset_tree<-drop.tip(tree,missing)
island_dataset_tree$edge.length <- island_dataset_tree$edge.length / (max(island_dataset_tree$edge.length))
A <- ape::vcv(island_dataset_tree,corr = TRUE)
set.seed(30011994)
brm_island_2 <- brm(
bin_island ~ 1 + polymorphism + log(bole_female) + (1| gr(species, cov = A)) ,
data = dataset_all_species_phylogeny,
family = bernoulli(),
#prior = prior,
data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
## Warning: There were 10000 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
pairs(brm_island_2)summary(brm_island_2)## Family: bernoulli
## Links: mu = logit
## Formula: bin_island ~ 1 + polymorphism + log(bole_female) + (1 | gr(species, cov = A))
## Data: dataset_all_species_phylogeny (Number of observations: 113)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~species (Number of levels: 113)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 5.93 10.86 0.86 20.84 1.00 1888 2723
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -2.91 13.12 -19.80 4.68 1.00 4466 2961
## polymorphismyes 5.29 7.74 1.26 16.36 1.00 3418 3049
## logbole_female 2.72 5.09 -0.20 10.18 1.00 3430 3278
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_island_2)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.579 (95% CI [0.209, 0.949])
## Marginal R2: 0.065 (95% CI [1.276e-24, 0.398])
## Check if the predicted values fits the rawdata
pp_check(brm_island_2)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
##Preliminary plots
#plot(conditional_effects(With this dataset there seems to be an association between colour polymorphism and the presence on islands
## .
## 0 1
## 0.472 0.528
## .
## 0 1
## 0.1803279 0.8196721
A similar pattern is obtained when with run the model without the phylogeny
set.seed(30011994)
brm_island_3 <- brm(
bin_island ~ 1 + polymorphism + log(bole_female),
data = dataset_all_species_phylogeny,
family = bernoulli(),
#prior = prior,
#data2 = list(A = A),
control = list(adapt_delta = 0.999, max_treedepth=10)
,chains = 4, cores = 4, iter = 20000
)## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(brm_island_3)summary(brm_island_3)## Family: bernoulli
## Links: mu = logit
## Formula: bin_island ~ 1 + polymorphism + log(bole_female)
## Data: dataset_all_species_phylogeny (Number of observations: 113)
## Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
## total post-warmup draws = 40000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.03 0.87 -1.73 1.68 1.00 21025 22160
## polymorphismyes 2.21 0.84 0.77 4.06 1.00 14330 14208
## logbole_female 0.48 0.42 -0.33 1.32 1.00 19339 20752
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_island_3)## # Bayesian R2 with Compatibility Interval
##
## Conditional R2: 0.105 (95% CI [0.024, 0.194])
## Check if the predicted values fits the rawdata
pp_check(brm_island_3)## Using 10 posterior draws for ppc type 'dens_overlay' by default.
Lets observe how polymorphic and monomorphic species differ according to all the predictor. To build this graph we will use the linear models with the total data set and the Island model with the highest number of species.
all_models<-NULL
for(mod in c("brm_latrange_1","area_polygon_1","eco_reg_points_1","eco_reg_polygon_1","temp_zones_polygon_1","temp_zones_points_1","brm_island_2")){
baye_mode = get(mod)
bayes_results<-baye_mode %>%
spread_draws(b_polymorphismyes)
bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
all_models<-bind_rows(all_models, bayes_results)
}
all_models$model<-factor(all_models$model,levels=c("brm_island_2","temp_zones_polygon_1","temp_zones_points_1","eco_reg_polygon_1","eco_reg_points_1","area_polygon_1","brm_latrange_1"))
na.omit(all_models) %>% ggplot(aes(y = model, x = b_polymorphismyes, fill = after_stat(x < 0))) +
stat_halfeye()+
theme_classic()+
geom_vline(xintercept = 0, linetype = "dashed",col="black",size=0.8)+
labs(x="Estimate",y="Models")+
xlim(-0.5,5)+
scale_fill_manual(values = c("#BCCAEF","#D66D79" ))## Warning: Removed 13295 rows containing missing values (`stat_slabinterval()`).
From all the variables and data filtering explored, we can conclude that monomorphic and polymorphic spider species in our dataset only differ in their presence on islands